import os
from jieba import lcut
from collections import Counter

root_dir = '校新闻'

def main():
    sentences = []
    dir_list = os.listdir(f'.\{root_dir}') #获取文件夹
    #print(dir_list)
    for one_news in dir_list:
        #print(one_news)
        if not os.path.exists(f'{root_dir}\{one_news}\{one_news}.txt'):
            continue #如果文件不存在，跳过
        with open(f'{root_dir}\{one_news}\{one_news}.txt', encoding='utf-8') as fp:
            for line in fp:
                sentences.append(line)

    text = ''.join(sentences) #将列表中的每个成员以字符''分隔开再拼接成一个字符串
    #print(text)
    words = lcut(text) #分词
    #过滤分词后长度小于1的词语或标点符号，！！！！！！！！
    words = filter(lambda word: len(word) > 1, words)
    freq = Counter(words);#统计次数
    print(freq.most_common(10))

if __name__ == "__main__":
    if not os.path.isdir(root_dir):
        print("无数据，无法分析")
    else:
        main()

